set.seed(1)
addTimeStamp <- function(saveInfo, tstart, info){
timeDiff <- abs(round(difftime(Sys.time(), tstart, units = "mins"), 3))
print(paste0("Adding Time Stamp : ", info , ", ", timeDiff, " minutes from start..."))
saveInfo$timeStamps[[length(saveInfo$timeStamps) + 1]] <- list(
info = info,
time = timeDiff
)
saveInfo
}
print(Sys.time())
## [1] "2020-02-14 11:54:09 PST"
#Print Input Params
print(params)
## $jobAttempt
## [1] 1
##
## $input
## [1] "/scratch/users/jgranja/BenchmarksLarge/outputSmall/Signac/Projects/Heme_Small_Rep1/Heme_Small_Rep1-Signac-4-Clustering-Save.rds"
##
## $project
## [1] "Heme_Small_Rep1"
##
## $projectMetadata
## [1] "Project_Metadata.tsv"
##
## $threads
## [1] 20
##
## $out1
## [1] "/scratch/users/jgranja/BenchmarksLarge/outputSmall/Signac/Projects/Heme_Small_Rep1/Heme_Small_Rep1-Signac-5-Embedding-Benchmark.rds"
##
## $out2
## [1] "/scratch/users/jgranja/BenchmarksLarge/outputSmall/Signac/Projects/Heme_Small_Rep1/Heme_Small_Rep1-Signac-5-Embedding-Save.rds"
#Get Project Metadata
df <- data.frame(readr::read_tsv(paste0(params$projectMetadata)))
## Parsed with column specification:
## cols(
## Project = col_character(),
## Metadata = col_character()
## )
#Get Sample Metadata
metadata <- data.frame(readr::read_tsv(df[paste0(df[,1]) == paste0(params$project), 2]))
## Parsed with column specification:
## cols(
## Sample = col_character(),
## Genome = col_character(),
## Path = col_character()
## )
print(metadata)
## Sample Genome
## 1 BMMC_R1 Hg19
## 2 BMMC_R2 Hg19
## 3 CD34_R1 Hg19
## 4 CD34_R2 Hg19
## 5 CD34_R3 Hg19
## Path
## 1 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/BMMC_Healthy_10x_R1.fragments.tsv.gz
## 2 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/BMMC_Healthy_10x_R2.fragments.tsv.gz
## 3 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/CD34_A_R1.fragments.tsv.gz
## 4 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/CD34_A_R2.fragments.tsv.gz
## 5 /oak/stanford/groups/howchang/users/jgranja/ArchR_scATAC/data/fragments/hg19/mpal/healthy/CD34_R3.fragments.tsv.gz
#Input Files
#inputFiles <- paste0(metadata$Path)
#names(inputFiles) <- metadata$Sample
#print(inputFiles)
#Genome
genome <- tolower(paste0(metadata$Genome[1]))
#Working Directory Relative to project
owd <- getwd()
setwd(dirname(paste0(params$out1)))
#Start Time
saveInfo <- list(timeStamps =
list(
list(info = "Start", time = 0)
),
info = list()
)
#Load R Libraries
library(Signac)
library(Seurat)
library(GenomeInfoDb)
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: S4Vectors
## Loading required package: stats4
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
##
## expand.grid
## Loading required package: IRanges
library(GenomicRanges)
library(ggplot2)
#1. Seurat Object
seuratObj <- readRDS(params$input)
print(seuratObj)
## An object of class Seurat
## 590637 features across 28388 samples within 1 assay
## Active assay: peaks (590637 features)
## 1 dimensional reduction calculated: lsi
print(paste0("Object Size = ", round(object.size(seuratObj)/10^9, 4), " GB"))
## [1] "Object Size = 6.149 GB"
tstart <- Sys.time()
saveInfo <- addTimeStamp(saveInfo, tstart, info = "Input")
## [1] "Adding Time Stamp : Input, 0 minutes from start..."
#2. UMAP
seuratObj <- RunUMAP(object = seuratObj, reduction = 'lsi', dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 11:55:16 UMAP embedding parameters a = 0.9922 b = 1.112
## 11:55:16 Read 28388 rows and found 30 numeric columns
## 11:55:16 Using Annoy for neighbor search, n_neighbors = 30
## 11:55:16 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 11:55:22 Writing NN index file to temp file /tmp/Rtmpj9VA3Z/file2c21c45a8126b
## 11:55:22 Searching Annoy index using 1 thread, search_k = 3000
## 11:55:33 Annoy recall = 100%
## 11:55:34 Commencing smooth kNN distance calibration using 1 thread
## 11:55:38 Initializing from normalized Laplacian + noise
## 11:55:39 Commencing optimization for 200 epochs, with 1183462 positive edges
## 11:56:13 Optimization finished
print(paste0("Object Size = ", round(object.size(seuratObj)/10^9, 4), " GB"))
## [1] "Object Size = 6.152 GB"
saveInfo <- addTimeStamp(saveInfo, tstart, info = "UMAP")
## [1] "Adding Time Stamp : UMAP, 0.947 minutes from start..."
#3. Plot UMAP
DimPlot(object = seuratObj, group.by = 'sampleName', label = TRUE, reduction = "umap") + NoLegend()
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.

DimPlot(object = seuratObj, label = TRUE, reduction = "umap") + NoLegend()

saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("plotUMAP"))
## [1] "Adding Time Stamp : plotUMAP, 0.982 minutes from start..."
#4. TSNE
seuratObj <- RunTSNE(object = seuratObj, reduction = 'lsi', dims = 1:30)
print(paste0("Object Size = ", round(object.size(seuratObj)/10^9, 4), " GB"))
## [1] "Object Size = 6.1574 GB"
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("TSNE"))
## [1] "Adding Time Stamp : TSNE, 3.199 minutes from start..."
#5. Plot TSNE
DimPlot(object = seuratObj, group.by = 'sampleName', label = TRUE, reduction = "tsne") + NoLegend()

DimPlot(object = seuratObj, label = TRUE, reduction = "tsne") + NoLegend()

saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("plotTSNE"))
## [1] "Adding Time Stamp : plotTSNE, 3.23 minutes from start..."
saveInfo <- addTimeStamp(saveInfo, tstart, info = paste0("Completed"))
## [1] "Adding Time Stamp : Completed, 3.23 minutes from start..."
print(Sys.time() - tstart)
## Time difference of 3.230458 mins
#Set Original Working Directory
#Save Output
setwd(owd)
saveRDS(saveInfo, params$out1)
saveRDS(seuratObj, params$out2)
#Print Session Info
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS Linux 7 (Core)
##
## Matrix products: default
## BLAS/LAPACK: /share/software/user/open/openblas/0.2.19/lib/libopenblasp-r0.2.19.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] ggplot2_3.2.1 GenomicRanges_1.38.0 GenomeInfoDb_1.22.0
## [4] IRanges_2.20.2 S4Vectors_0.24.3 BiocGenerics_0.32.0
## [7] Seurat_3.1.2 Signac_0.2.2
##
## loaded via a namespace (and not attached):
## [1] sn_1.5-5 plyr_1.8.5
## [3] igraph_1.2.4.2 lazyeval_0.2.2
## [5] splines_3.6.1 BiocParallel_1.20.1
## [7] listenv_0.8.0 TH.data_1.0-10
## [9] digest_0.6.23 htmltools_0.4.0
## [11] gdata_2.18.0 magrittr_1.5
## [13] memoise_1.1.0 BSgenome_1.54.0
## [15] cluster_2.1.0 ROCR_1.0-7
## [17] ggfittext_0.8.1 globals_0.12.5
## [19] Biostrings_2.54.0 readr_1.3.1
## [21] RcppParallel_4.4.4 matrixStats_0.55.0
## [23] R.utils_2.9.2 sandwich_2.5-1
## [25] prettyunits_1.1.1 colorspace_1.4-1
## [27] blob_1.2.1 rappdirs_0.3.1
## [29] ggrepel_0.8.1 xfun_0.12
## [31] dplyr_0.8.4 crayon_1.3.4
## [33] RCurl_1.98-1.1 jsonlite_1.6
## [35] survival_2.44-1.1 zoo_1.8-7
## [37] ape_5.3 glue_1.3.1
## [39] gtable_0.3.0 zlibbioc_1.32.0
## [41] XVector_0.26.0 leiden_0.3.2
## [43] DelayedArray_0.12.2 future.apply_1.4.0
## [45] scales_1.1.0 mvtnorm_1.0-12
## [47] DBI_1.1.0 bibtex_0.4.2.2
## [49] Rcpp_1.0.3 metap_1.3
## [51] plotrix_3.7-7 viridisLite_0.3.0
## [53] progress_1.2.2 reticulate_1.14
## [55] bit_1.1-15.1 rsvd_1.0.2
## [57] SDMTools_1.1-221.2 tsne_0.1-3
## [59] htmlwidgets_1.5.1 httr_1.4.1
## [61] gplots_3.0.1.2 RColorBrewer_1.1-2
## [63] TFisher_0.2.0 ica_1.0-2
## [65] farver_2.0.3 pkgconfig_2.0.3
## [67] XML_3.99-0.3 R.methodsS3_1.7.1
## [69] ggseqlogo_0.1 uwot_0.1.5
## [71] labeling_0.3 tidyselect_1.0.0
## [73] rlang_0.4.4 reshape2_1.4.3
## [75] AnnotationDbi_1.48.0 munsell_0.5.0
## [77] tools_3.6.1 RSQLite_2.2.0
## [79] ggridges_0.5.2 evaluate_0.14
## [81] stringr_1.4.0 yaml_2.2.1
## [83] npsurv_0.4-0 knitr_1.27
## [85] bit64_0.9-7 fitdistrplus_1.0-14
## [87] caTools_1.18.0 purrr_0.3.3
## [89] RANN_2.6.1 pbapply_1.4-2
## [91] future_1.16.0 nlme_3.1-140
## [93] R.oo_1.23.0 biomaRt_2.40.5
## [95] compiler_3.6.1 plotly_4.9.1
## [97] png_0.1-7 lsei_1.2-0
## [99] tibble_2.1.3 stringi_1.4.5
## [101] RSpectra_0.16-0 GenomicFeatures_1.36.4
## [103] lattice_0.20-38 Matrix_1.2-17
## [105] multtest_2.42.0 vctrs_0.2.2
## [107] mutoss_0.1-12 pillar_1.4.3
## [109] lifecycle_0.1.0 Rdpack_0.11-1
## [111] lmtest_0.9-37 RcppAnnoy_0.0.14
## [113] data.table_1.12.8 cowplot_1.0.0
## [115] bitops_1.0-6 irlba_2.3.3
## [117] gbRd_0.4-11 gggenes_0.4.0
## [119] patchwork_1.0.0 rtracklayer_1.46.0
## [121] R6_2.4.1 KernSmooth_2.23-15
## [123] gridExtra_2.3 codetools_0.2-16
## [125] MASS_7.3-51.4 gtools_3.8.1
## [127] assertthat_0.2.1 SummarizedExperiment_1.16.1
## [129] withr_2.1.2 GenomicAlignments_1.22.1
## [131] sctransform_0.2.1 Rsamtools_2.2.1
## [133] mnormt_1.5-5 multcomp_1.4-12
## [135] GenomeInfoDbData_1.2.2 hms_0.5.3
## [137] grid_3.6.1 tidyr_1.0.2
## [139] rmarkdown_2.1 Rtsne_0.15
## [141] numDeriv_2016.8-1.1 Biobase_2.46.0